{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "authorship_tag": "ABX9TyMZHViR6npRf89UzVPvDKHG",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/sugarforever/LlamaIndex-Tutorials/blob/main/03_Customization/03_Customization.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "3gyCNIM1Im7-"
      },
      "outputs": [],
      "source": [
        "!pip install -q -U llama-index chromadb"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!mkdir data\n",
        "!wget https://raw.githubusercontent.com/jerryjliu/llama_index/main/examples/paul_graham_essay/data/paul_graham_essay.txt -O data/paul_graham_essay.txt"
      ],
      "metadata": {
        "id": "5a1h2iL3I5Fs"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install openai"
      ],
      "metadata": {
        "id": "DAx-uT-XLgTf"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "os.environ['OPENAI_API_KEY'] = 'your valid openai api key'\n",
        "\n",
        "import chromadb\n",
        "from llama_index import VectorStoreIndex, SimpleDirectoryReader\n",
        "from llama_index import ServiceContext\n",
        "from llama_index.vector_stores import ChromaVectorStore\n",
        "from llama_index import StorageContext\n",
        "from llama_index.llms import OpenAI"
      ],
      "metadata": {
        "id": "lmaZiSPUJSUw"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 自定义文档分块，指定 LLM"
      ],
      "metadata": {
        "id": "vs48EWb3KGjh"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "service_context = ServiceContext.from_defaults(chunk_size=500, llm=OpenAI())\n"
      ],
      "metadata": {
        "id": "mWbUr_INJMbH"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 自定义向量存储"
      ],
      "metadata": {
        "id": "-web3MtHKiaN"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "chroma_client = chromadb.PersistentClient()\n",
        "chroma_collection = chroma_client.create_collection(\"quickstart\")\n",
        "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
        "storage_context = StorageContext.from_defaults(vector_store=vector_store)"
      ],
      "metadata": {
        "id": "TOKP_4ljKl55"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 索引文档"
      ],
      "metadata": {
        "id": "jxuR1Z21KyTJ"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "documents = SimpleDirectoryReader('data').load_data()\n",
        "index = VectorStoreIndex.from_documents(documents, service_context=service_context,storage_context=storage_context)"
      ],
      "metadata": {
        "id": "JF9D7myqKf6f"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 指定响应模式，以及启用流式响应"
      ],
      "metadata": {
        "id": "F2nCITY8LP5r"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "query_engine = index.as_query_engine(response_mode='tree_summarize', streaming=True)\n",
        "response = query_engine.query(\"What did the author do?\")\n",
        "response.print_response_stream()"
      ],
      "metadata": {
        "id": "4Z_ainQLJOcd"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}